#---------------------------------------------------------------------------------------------#
#Basic setup procedure...---------------------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#This first thing is just a standard "housekeeping" step to tidy up the workspace...
rm(list = ls())
#Used for stata importing
library(Hmisc)
library(xtable)
library(ggplot2)
#---------------------------------------------------------------------------------------------#
stattab.orig = stata.get("apms07arch.dta")
stattab = stattab.orig
#---------------------------------------------------------------------------------------------#
#This simply removes all entires with an age of less than 18, as per the age-range that *seems* to be used throughout
stattab = stattab[stattab$resage > 17,]
#---------------------------------------------------------------------------------------------#
#Some functions put together here...----------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#Convenience function for grabbing out multiple columns for further manipulation
grabcols = function(cols)
{
  sel.cols = paste(c(cols), collapse="|")
  sel.cols = grep(sel.cols, colnames(stattab))
  sel.tab = stattab[,sel.cols]
  return(sel.tab)
}
#---------------------------------------------------------------------------------------------#
#Onto our first bit of replication...---------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#Ok, all a bit more tidy now
#Next up we need to locate our important columns here: Suicidal thoughts, suicide attempts + SH
#dshlife, dshtry and dshharm
#Make this a little robust in case we want to expand later on...
#Found dshharm etc by cross-checking with datadocs and report appendices - self completion data
#This is a tricky bit, several similar questions + not clearly describing which var is which!
#---------------------------------------------------------------------------------------------#
#Here I'm just putting together the basic contingency table-----------------------------------#
#---------------------------------------------------------------------------------------------#
fulltab = NULL
basetabvars = c("dshlife", "dshtry", "dshharm")
for(i in 1:length(basetabvars))
{
  #Make basic two-way table
  currtab = eval(parse(text=paste("xtabs(formula = ~",basetabvars[i]," + age20yr, data=stattab)")))
  #Then work out age proportions using this
  curryes = match("yes", rownames(currtab))
  currno = match("no", rownames(currtab))
  #Then actually form a table of the quantities we want to look at...
  currsum = currtab[c(curryes, currno),6:9]
  currsum = cbind(currsum, rowSums(currsum))
  #Before finding proportions
  currprop = prop.table(currsum, 2)
  #And gluing it all together
  fulltab = rbind(fulltab, currprop[1,])
}
#---------------------------------------------------------------------------------------------#
#So, that's proportions sorted, but now we want to get it all looking pretty...
#---------------------------------------------------------------------------------------------#
#Get as percentages
fulltab = round(fulltab * 100, 1)
#Finally, sort out naming
rownames(fulltab) = c("Suicidal thoughts", "Suicide attempts", "Self-harm")
colnames(fulltab) = c("18-34", "35-54", "55-74", "75+", "All")
#Ta-dah!
print(fulltab)
#Write into LaTeX-friendly form
print(xtable(fulltab))
#---------------------------------------------------------------------------------------------#
#Now just want to compare with original table a bit...
#---------------------------------------------------------------------------------------------#
#So, issues here - doesn't quite tally with actual table - not entirely sure why.
#And if we test against first row, it seems individuals have been lost somewhere?
orig1 = c(19.7, 20.1, 12.7, 3.5)
orig2 = c(6.9, 6.4, 4.4, 1.3)
orig3 = c(9.3, 4.8, 1.2, 0.5)
orig.test = rbind(orig1, orig2, orig3)
#Fairly minor divergences, but something seems missing?
rowSums(fulltab[,1:4] - orig.test)
#Really not sure why the difference exists here, have tried many manipulations to resolve it but no joy
#---------------------------------------------------------------------------------------------#
#Next replication bit, preparation for first figures------------------------------------------#
#---------------------------------------------------------------------------------------------#
#Note "an ad hoc summative social capital score (denoted SCS) is used"
#So, let's get calculating these for everyone!
#First, need to work out which cols will correspond to each of these, lots of comments to follow...
#---------------------------------------------------------------------------------------------#
#Note these are NOT recorded as binary, so not super-simple to break them down here...
#Based on the table of variables, on the far right are written what the column names are (NOTE CAPITAL LETTERS!)
#X1 - FF make happy - dlss1
#X2 - FF make loved - dlss2
#X3 - FF relied upon - dlss3
#X4 - FF see taken care of - dlss4
#X5 - FF accept me - dlss5
#X6 - FF make me feel important - dlss6
#X7 - FF give support - dlss7
#X8 - Feel belong - Belong
#X9 - Trust here - Trust
#X10 - Enjoy here - Enjoy
#X11 - Real home - Realhme
#X12 - Feel safe day - Safe
#X13 - Like move - Move
#X14 - Nicely kept - Resident
#X15 - Litter problem - Litter
#X16 - Graffiti problem - Graffit
#X17 - Participate community - ComGrp
#X18 - Involved clubs - noclub
#---------------------------------------------------------------------------------------------#
varnames = c("Suicidal behaviours",
             "Y1 ideation", "Y2 attempt", "Y3 self-harm",
             "Social capital",
             "X1","X2","X3","X4","X5","X6","X7","X8",
             "X9","X10","X11","X12","X13","X14","X15",
             "X16","X17","X18")
descs = c("",
          "There may be times in everyone's life when they become very miserable and depressed and may feel like taking drastic action because of these feelings. Have you ever thought of taking your life, even if you would not really do it?",
          "Have you ever made an attempt to take your life, by taking an overdose of tablets or in some other way?",
          "Have you ever deliberately harmed yourself in any way but not with the intention of killing yourself?",
          "",
          "Family and friends do things to make me happy (certainly true = 1, not true/partly true = 0)",
          "Family and friends make me feel loved (certainly true = 1, not true/partly true = 0)",
          "Family and friends can be relied on no matter what happens (certainly true = 1, not true/partly true = 0)",
          "Family and friends would see that I am taken care of if I needed to be (certainly true = 1, not true/partly true = 0)",
          "Family and friends accept me just the way I am (certainly true = 1, not true/partly true = 0)",
          "Family and friends make me feel an important part of their lives (certainly true = 1, not true/partly true = 0)",
          "Family and friends give me support and encouragement (certainly true = 1, not true/partly true = 0)",
          "I feel like I belong around here (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "I trust people around here (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "I enjoy living around here (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "I think of the area around here as a real home not just a place (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "I feel safe around here in the day time (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "Given the opportunity I would like to move away from here (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "The area around here is nicely kept by its residents (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "Litter is a problem around here (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "Graffiti or vandalism is a problem around here (strongly agree = 1, somewhat agree/disagreement/neutrality = 0)",
          "Participate in community group (at least once a month = 1, less than once a month/never = 0)",
          "Involved in clubs/activities (no = 1, yes = 0)")




realvar = c("",
            "dshlife", "dshtry", "dshharm",
            "",
            "dlss1", "dlss2", "dlss3", "dlss4", "dlss5", "dlss6",
            "dlss7", "Belong", "Trust", "Enjoy", "Realhme", "Safe",
            "Move", "Resident", "Litter", "Graffit", "ComGrp", "noclub")

realtab = cbind(varnames, realvar, descs)
colnames(realtab) = c("Variable name", "Dataset column name", "Description")

xtable(realtab)



#---------------------------------------------------------------------------------------------#
#Ok, so need to go through each row and work out individual scores, let's go!
relev.vars = c("dlss", "Belong", "Trust", "Enjoy", "Realhme", "Safe", "Move", "Resident", "Litter",
               "Graffit", "ComGrp", "noclub")
#Which cols are these to be found in (looking for 18 if my gambit pays off...)
relev.cols = grep(paste(relev.vars, collapse="|"), colnames(stattab), ignore.case=T)
#Ok, so now we have all of the columns we're going to be using for our social capital scores
#dlss and the others use slightly different scales
#dlss: Yes = certainly true
#Other non-club: Yes =  Agree
#Clubs: Yes = Mentioned
#Note that the levels of positivity are pretty weird (partly true as a negative response), but this seemed to give the
#closest replication of the graphs in the original paper.

#Could make this all into a nice function as it's very repetitive, but for now...
#---------------------------------------------------------------------------------------------#
#First up, dlss things
#---------------------------------------------------------------------------------------------#
#Just grab only the columns we want to be looking at, no need to worry about the others...
scs.tab = stattab[,relev.cols]
#Find out WHICH of the levels are the ones we're looking for (only the positive ones)
#poslevs.dlss = grep("partly true|certainly true", levels(scs.tab[,1]))
poslevs.dlss = grep("certainly true", levels(scs.tab[,1]))
#Could we be having a problem because I'm falsely using missing data?
neglevs.dlss = grep("not true|partly true", levels(scs.tab[,1]))
#neglevs.dlss = grep("not true", levels(scs.tab[,1]))
#Then for each of the dlss questions, 
for(i in 1:7){
  #Here we replace the positive levels with ones
  levels(scs.tab[,i])[poslevs.dlss] = 1
  #And negative/neutral ones with 0s
  #levels(scs.tab[,i])[which(levels(scs.tab[,i])!=1)] = 0
  levels(scs.tab[,i])[neglevs.dlss] = 0
}
#So, now these columns are set up as binary
#---------------------------------------------------------------------------------------------#
#Now for our various named things, positives a 1, negatives a 0 again (can flip this later on...)
#---------------------------------------------------------------------------------------------#
#Basically doing the same as above, but now with different "positive" thingies
#poslevs.oth = grep("strongly agree|somewhat agree", levels(scs.tab[,8]))
poslevs.oth = grep("strongly agree", levels(scs.tab[,8]))
neglevs.oth = grep("somewhat agree|strongly disagree|somewhat disagree|neither agree", levels(scs.tab[,8]))
#neglevs.oth = grep("strongly disagree", levels(scs.tab[,8]))
for(i in 8:16)
{
  levels(scs.tab[,i])[poslevs.oth] = 1
  #levels(scs.tab[,i])[which(levels(scs.tab[,i])!=1)] = 0
  levels(scs.tab[,i])[neglevs.oth] = 0
}
#---------------------------------------------------------------------------------------------#
#Community group is just weird, so has to be handled all alone
#---------------------------------------------------------------------------------------------#
#Note here that they ignore the other "positive" option of "at least once a year" - screwed me over for a bit
poslevs.cgrp = grep("at least once a month", levels(scs.tab[,17]))
neglevs.cgrp = grep("year", levels(scs.tab[,17]))

levels(scs.tab[,17])[poslevs.cgrp] = 1
#Again, need to do negative situation properly...
#levels(scs.tab[,17])[which(levels(scs.tab[,17]) != 1)] = 0
levels(scs.tab[,17])[neglevs.cgrp] = 0
#---------------------------------------------------------------------------------------------#
#And finally, deal with the community categories (there are LOTS)
#---------------------------------------------------------------------------------------------#
#Same basic approach, but have to do some regexp magic with grep to make sure it grabs the CORRECT positive
poslevs.ccat = grep("^mentioned", levels(scs.tab[,18]))
neglevs.ccat = grep("not mentioned", levels(scs.tab[,18]))
#neglevs.ccat = grep("not mentioned", levels(scs.tab[,18]))
#Note that we handle this differently - there is no true positive situation, can only count AGAINST SCS
levels(scs.tab[,18])[poslevs.ccat] = 0
#levels(scs.tab[,18])[which(levels(scs.tab[,18])!=0)] = 1
levels(scs.tab[,18])[neglevs.ccat] = 1
#---------------------------------------------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#Ok, so now we need to collate these various valuations into our overall 'score'
#X1 - X12, X14, X17 all positive measures - find these in table
#X13, 15, 16, 18 negative measures
# All of the positive ones add +1 to score, negative -1
#Positive values, can simply be rowsummed
#Ah, first need to stick these through a bit of a mangle to scrape away all the factor stuff...
scs.tab = apply(apply(scs.tab, 2, as.character), 2, as.numeric)
#Clubs currently needs to be flipped - at present 1 for mentioned, 0 for unmentioned - not right
#So here we flip it such that not mentioned == 1, and mentioned == 0...
#scs.tab[,18] = (scs.tab[,18]*-1)+1
#Which are our +ve effect cols?
scs.pos = rowSums(scs.tab[,c(1:12, 14, 17)])
#And -ve?
scs.neg = rowSums(scs.tab[,c(13, 15, 16, 18)])
#Calculate final score
scs.total = scs.pos - scs.neg
#Aggregate super-low values...
scs.total[scs.total < -2] = -2
#---------------------------------------------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#DSH3, DSH4, DSH5 - just try these out for giggles
suic.tab = grabcols(c("dshlife", "dshtry", "dshharm"))
#suic.tab = grabcols(c("dsh3", "dsh4", "dsh5"))
#We could make this a little more complex?
suic.tab$dshlife = as.numeric(suic.tab$dshlife=="yes")
suic.tab$dshtry = as.numeric(suic.tab$dshtry=="yes")
suic.tab$dshharm = as.numeric(suic.tab$dshharm=="yes")
# suic.tab$dsh3 = as.numeric(suic.tab$dsh3=="yes")
# suic.tab$dsh4 = as.numeric(suic.tab$dsh4=="yes")
# suic.tab$dsh5 = as.numeric(suic.tab$dsh5=="yes")
#---------------------------------------------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#---------------------------------------------------------------------------------------------#
#Next up, plotting SCS scores against our various suicide indicators
#The way this is done is "relative outcome" against various groupings of SCS score
#So proportions in each group again, I think?
#Very quick and dirty for now, but just to get it working...
#Also, note that figures in paper actually split by gender, so, identify male and female rows.
#---------------------------------------------------------------------------------------------#
#Function for basic plotting of measures of suicide against SCS/quintile----------------------#
#---------------------------------------------------------------------------------------------#
plotfun = function(suicmeasure, sex, socmeasure)
{
  #From our measures, which are we playing with...
  suicmeasures = c("dshlife", "dshtry", "dshharm")
  currmeasure = which(suicmeasures == suicmeasure)
  #These are ideation, attempt and harm respectively
  #Let's choose colors to match the paper plot...
  colors = c("cadetblue", "firebrick", "olivedrab")
  #Just need to bring together our scs measure with core table, for convenience
  if(socmeasure == "scs.total")
  {
    newtable = cbind(stattab, scs.total)
    #Makes sure we're not including incomplete cases
    newtable = newtable[complete.cases(newtable$scs.total),]
  }else{newtable = stattab}
  
  stat.fem = which(newtable$ressex == "female")
  stat.mal = which(newtable$ressex == "male")
  #Decide which chunk of table to look at
  if(sex == "female"){newtable = newtable[stat.fem,]}else{newtable = newtable[stat.mal,]}
  #Make table, from which we can calculate proportions
  figtab = eval(parse(text=paste("xtabs(formula = ~", suicmeasure, "+", socmeasure, ",data=newtable)")))
  #Proportions
  #figprops = figtab[7,] / apply(figtab[7:8,], 2, sum)
  #Just a bit tidier here...
  figprops = prop.table(figtab[7:8,], margin=2)[1,]
  #Plot!
  if(socmeasure == "scs.total")
  {points(as.numeric(names(figprops)), figprops, type = "l", lwd = 4, ylim = c(0,1), 
          col = colors[currmeasure])
  }else{
    points(1:5, figprops[6:10], type = "l", lwd = 4, ylim = c(0,1), col = colors[currmeasure])}
  #print(figprops)
}
#---------------------------------------------------------------------------------------------#
#In the interest of tidiness, put together a function which just handles running our plot function lots
#---------------------------------------------------------------------------------------------#
allplot = function(gender, socmeasure)
{
  #Ensures suitable plot ranges
  if(socmeasure == "scs.total")
  {
    currxlab = "Social Capital Score"
    my.xlim = c(-2,14)
    if(gender=="female")
    {my.ylim = c(0,0.9)}else{my.ylim = c(0,0.5)}
    my.xlab = "Social Capital Score"
    leg.pos = "top"
  }else{
    currxlab = "Deprivation Quintile"
    my.xlim = c(1,5)
    if(gender=="female")
    {my.ylim = c(0,0.3)}else{my.ylim = c(0,0.18)}
    my.xlab = "Deprivation Quintile"
    leg.pos = "center"
  }
  #Plot base layer
  plot(1, xlim = my.xlim, ylim = my.ylim, xlab = currxlab, ylab = "Outcome rate", type = "n",xaxt = "n",
       cex.lab = 1.5, cex.axis = 1.5)
  #Choose suitable axis
  if(socmeasure != "scs.total")
  {
    axis(1, at = 1:5, labels = 1:5, cex.axis = 0.9)
  }else{
    axis(1, at = -4:14, cex.axis = 0.8)
  }
  #Actually do our plots
  plotfun("dshlife", gender, socmeasure)
  plotfun("dshtry", gender, socmeasure)
  plotfun("dshharm", gender, socmeasure)
  #Choose right title
  if(gender == "female")
  {title(main = "Female outcomes")
  }else{
    title(main = "Male outcomes")}
  #Aaand add our legend
  legend(leg.pos,c("Ideation-(dshlife)", "Attempt-(dshtry)", "Harm-(dshharm)"), 
         col = c("cadetblue", "firebrick", "olivedrab"), lwd = 4, bty="n")
}
#--------------------------------------------------------------------------------------------------#
#Finally, call in our functions and write everything to the pdf "Figures.pdf" in the working directory
#--------------------------------------------------------------------------------------------------#
pdf(file="Figures.pdf", width = 12, height = 6)
par(mfrow = c(1,2))
allplot("male", "scs.total")
allplot("female", "scs.total")
allplot("male", "newtable$qimd")
allplot("female", "newtable$qimd")
dev.off()
#**************************************************************************************************#
#ALL BELOW HERE IS WORK ON SEM - TRYING TO IDENTIFY CORRECT MEASURES AND WORK THEM INTO SUITABLE FORMAT
#**************************************************************************************************#
#May as well continue with SEM stuff here, as need all the binary variables set up
#K=3 binary response variables == dshlife, dshtry, dshharm
#P=18 indicators of socap. == X1:X18
#***NEW BIT*** 
#R=5 indicators of socioeconomics == hh income - log(eqvinc), degree or not - edqual5 == degree,
#employed or not - nssecr == any job?, owner occupier or not - tenure, 
#income benefit or not - incben7 I think?
#Q=3 isolation indicators == One person household - dvhsize, migrant - hwlong I *think*, 
#married - resmardf
#Location = gor06

#So, we already have out scs.tab which pretty much does all we need for the socap bit - beware noclub
#--------------------------------------------------------------------------------------------------#
#SES up next
#--------------------------------------------------------------------------------------------------#
ses.tab = grabcols(c("^eqvinc$", "edqual5", "^dvilo3a$", "tenure", "incben7"))
#--------------------------------------------------------------------------------------------------#
#Now need to actually work things into a usable state
#Hmm, bit of a fudgey way to handle this, ideally want to purge NAs, but they are fairly minimal?
ses.tab$eqvinc = round(log10(ses.tab$eqvinc))
ses.tab$edqual5 = as.numeric(ses.tab$edqual5 == "degree")
ses.tab$dvilo3a = as.numeric(ses.tab$dvilo3a == "in employment or unpaid family worker")
ses.tab$tenure = as.numeric(ses.tab$tenure == "yes - owner-occupier")
ses.tab$incben7 = as.numeric(ses.tab$incben7 == "mentioned")
colnames(ses.tab) = c("z[,1]", "z[,2]", "z[,3]", "z[,4]", "z[,5]")
#--------------------------------------------------------------------------------------------------#
#And now for isolation-----------------------------------------------------------------------------#
#--------------------------------------------------------------------------------------------------#
isol.tab = grabcols(c("dvhsize", "hwlong", "resmardf"))
#--------------------------------------------------------------------------------------------------#
isol.tab$dvhsize = as.numeric(isol.tab$dvhsize == 1)
isol.tab$hwlong = as.numeric(isol.tab$hwlong == "less than one year")
isol.tab$resmardf = as.numeric(isol.tab$resmardf == "married")
colnames(isol.tab) = c("x[,1]", "x[,2]", "x[,3]")
#--------------------------------------------------------------------------------------------------#
srs.tab = grabcols(c("dshlife", "dshtry", "dshharm"))
srs.tab$dshlife = as.numeric(srs.tab$dshlife == "yes")
srs.tab$dshtry = as.numeric(srs.tab$dshtry == "yes")
srs.tab$dshharm = as.numeric(srs.tab$dshharm == "yes")
colnames(srs.tab) = c("y[,1]", "y[,2]", "y[,3]")
#--------------------------------------------------------------------------------------------------#
oth.tab = grabcols(c("ethnic4", "resage", "eqvinc5"))
#resage is just fine for the moment
oth.tab$ethnic4 = as.numeric(oth.tab$ethnic4 == "white")
levels(oth.tab$eqvinc5)[6:10] = c(45.22, 26.61, 17.02, 10.96, 5.74)
oth.tab$eqvinc5 = as.numeric(as.character(oth.tab$eqvinc5))
colnames(oth.tab) = c("w[,1]", "w[,2]", "w[,3]")
#--------------------------------------------------------------------------------------------------#
#Also need things like regional effects and demographic stuff... Maybe...
#Not really sure how regional stuff works, so demographics probably easiest to incorporate
#--------------------------------------------------------------------------------------------------#
#Really want everything in a big ol' table, so do just that, then write to file--------------------#
#--------------------------------------------------------------------------------------------------#
complete.table = cbind(isol.tab, srs.tab, ses.tab, oth.tab)
complete.table = complete.table[-which(rowSums(apply(complete.table, 2, complete.cases)) < ncol(complete.table)),]
write.table(complete.table,"Complete.table.csv", quote=F, row.names=F)
#--------------------------------------------------------------------------------------------------#
#Just playing around a little with RJAGS...
# model{
# for(i in 1:n)
# {
#   ISOL[i] = dnorm(0,1)
#   for(j in 1:3)
#   {
#     x[i,j] = dnorm(mu.x[i,j], betax[j])
#     mu.x[i,j] = alphax[j]+gammax[j]*ISOL[j]
#   }
# }
# 
# for(j in 1:3)
# {
#   alphax[j] ~ dnorm(0,0.001);
#   betax[j] ~ dnorm(0,0.001)
#   gammax[j] ~ dnorm(0,0.001)
# }
# }



